In [1]:
import os
import sodapy
from sodapy import Socrata
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import contextily as ctx
%matplotlib inline

pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)

gpd.__version__
Out[1]:
'0.7.0'

Buffalo 311 data

In [2]:
# url method for reading in buffalo 311 service requests dataset
#url='https://data.buffalony.gov/resource/whkc-e5vr.geojson?$limit=1000000'
#buffalo_311=gpd.read_file(url)
In [3]:
# SODA API method for reading in  buffalo 311 service requests dataset
socrata_domain='data.buffalony.gov'
socrata_dataset_identifier='whkc-e5vr'
app_token=os.environ.get('SODAPY_APPTOKEN')
client=Socrata(socrata_domain,app_token) 
dict=client.get(socrata_dataset_identifier,content_type='geojson?$limit=1000000',\
#where ='',\
select='latitude,longitude,address_number,address_line_1,type')
buffalo_311=gpd.GeoDataFrame.from_features(dict)
In [4]:
# set the geometry for the GeoDataFrame to the longitude and latitude values and initial projection epsg:4326
buffalo_311 = gpd.GeoDataFrame(
    buffalo_311,crs='epsg:4326',geometry=gpd.points_from_xy(buffalo_311.longitude.astype(float), buffalo_311.latitude.astype(float)))
In [5]:
buffalo_311.shape
Out[5]:
(809778, 6)
In [6]:
#drop any NaN values
buffalo_311.dropna(how='any',axis=0,inplace=True)
buffalo_311.shape
Out[6]:
(730975, 6)
In [7]:
#removing any missing geometry data
nrows=buffalo_311.shape[0]
buffalo_311=buffalo_311.loc[buffalo_311.geometry.notnull()]
krows=buffalo_311.shape[0]
removed=nrows-krows
pctremoved=(removed/nrows)*100
print("Original number of rows = {}\
, Number of rows missing coordinates = {}\
, Percent missing data = {:.1f}%".format(nrows,removed,pctremoved))
if pctremoved>10:
    print("WARNING: Percent missing location data exceeds recommended limit!")

buffalo_311=buffalo_311.to_crs('epsg:3857')
Original number of rows = 730975, Number of rows missing coordinates = 0, Percent missing data = 0.0%
In [19]:
# new data frame with split address column
new_list = buffalo_311['address_line_1'].str.split(' ',n=1,expand = True) 

# making separate column for first part of street name 
buffalo_311['name']= new_list[0]

#making new column address with the street # and first part of street name only
buffalo_311['address']=buffalo_311['address_number']+' '+buffalo_311['name']

buffalo_311.head()
Out[19]:
geometry latitude longitude address_number type address_line_1 name address
0 POINT (-8776289.546 5292042.105) 42.86892484168224 -78.83875036727827 43 Rodents (Req_Serv) WALTER WALTER 43 WALTER
2 POINT (-8774686.879 5301540.598) 42.931429915813446 -78.82435336976692 614 Totes Replace (Req_Serv) WYOMING WYOMING 614 WYOMING
3 POINT (-8776103.599 5294913.533) 42.88782703051763 -78.83707997643731 INTERSECTION Pot Hole (Req_Serv) Lombard St Lombard INTERSECTION Lombard
4 POINT (-8775361.826 5302153.031) 42.93545786862617 -78.83041652188895 127 Totes Replace (Req_Serv) RICHLAWN RICHLAWN 127 RICHLAWN
5 POINT (-8775052.626 5304133.467) 42.948481344124 -78.82763893333582 25 Housing Violations (Req_Serv) LISBON LISBON 25 LISBON

Buffalo-clean Data

In [9]:
# read in buffalo-clean dataset
buffalo_clean=pd.read_csv('https://drive.google.com/uc?id=1jIBr3W0p28VJj0TWJGtqKjxu-ejyenw0',low_memory=False)
In [10]:
# new data frame with split address column
new_list = buffalo_clean['address'].str.split(' ',n=2,expand = True) 
  
# making separate column for first part of street name 
buffalo_clean['address']= new_list[0]+' '+new_list[1]
In [11]:
# merging buffalo-clean data with buffalo 311 data on 'address'
clean_311=pd.merge(buffalo_311,buffalo_clean,on='address')
clean_311.shape
Out[11]:
(133909, 72)
In [12]:
# keeping only vacant properties
clean_311_y=clean_311.loc[clean_311['vacant_']=='Y']
clean_311_y.shape
Out[12]:
(22590, 72)
In [13]:
# keeping only relevant columns
clean_311_y=clean_311_y[['address','type','geometry','latitude','longitude']]
clean_311_y.head()
Out[13]:
address type geometry latitude longitude
30 383 MACKINAW Parking Issues (Req_Serv) POINT (-8778172.753 5291895.079) 42.86795684069256 -78.85566750297815
31 383 MACKINAW Missed Pickup 2_Piece Large Trash (Req_Serv) POINT (-8778172.753 5291895.079) 42.86795684069256 -78.85566750297815
32 383 MACKINAW Parking Issues (Req_Serv) POINT (-8778172.753 5291895.079) 42.86795684069256 -78.85566750297815
33 383 MACKINAW Street Snow Plowing (Req_Serv) POINT (-8778172.753 5291895.079) 42.86795684069256 -78.85566750297815
34 383 MACKINAW Rodents (Req_Serv) POINT (-8778172.753 5291895.079) 42.86795684069256 -78.85566750297815

Buffalo Neighborhoods

In [14]:
# bringing in neighborhood data for polygon layer
url='https://data.buffalony.gov/resource/pg8k-g5iz.geojson?$limit=1000000'
hoods=gpd.read_file(url)
hoods=hoods.to_crs('epsg:3857')
In [15]:
#removing any missing geometry data
nrows=hoods.shape[0]
hoods=hoods.loc[hoods.geometry.notnull()]
krows=hoods.shape[0]
removed=nrows-krows
pctremoved=(removed/nrows)*100
print("Original number of rows = {}\
, Number of rows missing coordinates = {}\
, Percent missing data = {:.1f}%".format(nrows,removed,pctremoved))
if pctremoved>10:
    print("WARNING: Percent missing location data exceeds recommended limit!")
Original number of rows = 79, Number of rows missing coordinates = 0, Percent missing data = 0.0%
In [16]:
# plotting common addresses between vacant buffalo-clean and buffalo 311 service request

hoods_poly=hoods.plot(alpha=0.5,figsize=(10,10),edgecolor='black',color='dodgerblue',linewidth=2)
ctx.add_basemap(hoods_poly,source=ctx.providers.Stamen.TonerLite)
clean_311_y.plot(ax=hoods_poly,marker='o',color='red',markersize=50,alpha=1,edgecolor='black',linewidth=1)
plt.title('Buffalo Vacant Homes',fontsize=20)
plt.axis('off');
In [18]:
# plotting interactive common addresses between vacant buffalo-clean and buffalo 311 service request

from bokeh.tile_providers import CARTODBPOSITRON, get_provider
tileProvider = get_provider('CARTODBPOSITRON_RETINA')

from bokeh.io import output_notebook, show, output_file, save
from bokeh.plotting import figure
from bokeh.models import (HoverTool, GeoJSONDataSource, LogColorMapper, ColorBar)
from bokeh.transform import linear_cmap,log_cmap
import bokeh.palettes

output_notebook()

TOOLS="pan,wheel_zoom,box_zoom,reset,save"

f = figure(title='',tools=TOOLS,\
plot_width=800, plot_height=700,\
outline_line_color=None,\
min_border=0,min_border_left=0,min_border_right=0,\
min_border_top=0,min_border_bottom=0)

f.add_tile(tileProvider)

f.title.text_font_style='italic'
f.title.text_font_size='14pt'
f.axis.visible = False
           
point=clean_311_y.to_crs('epsg:3857')
poly=hoods.to_crs('epsg:3857')

point_source= GeoJSONDataSource(geojson=point.to_json())
poly_source= GeoJSONDataSource(geojson=poly.to_json())

areas=f.patches('xs','ys',source=poly_source,
               fill_color='dodgerblue',fill_alpha=0.5,line_color='black',line_width=0.5)

circles=f.circle('x','y', size=10,fill_color='red',line_color='black',
                 fill_alpha=1,source=point_source)

c_hover=HoverTool(renderers=[circles])
c_hover.point_policy='follow_mouse'
c_hover.tooltips=[('Type:', '@type'),
                 ('Address', '@address')]
f.add_tools(c_hover)

output_file('clean_311_y.html',title='clean_311_y')

show(f)
Loading BokehJS ...
In [ ]: